載入packages

library("Seurat")
## Loading required package: SeuratObject
## Loading required package: sp
## 'SeuratObject' was built under R 4.3.0 but the current version is
## 4.3.2; it is recomended that you reinstall 'SeuratObject' as the ABI
## for R may have changed
## 'SeuratObject' was built with package 'Matrix' 1.6.3 but the current
## version is 1.6.4; it is recomended that you reinstall 'SeuratObject' as
## the ABI for 'Matrix' may have changed
## 
## Attaching package: 'SeuratObject'
## The following object is masked from 'package:base':
## 
##     intersect
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("magrittr")

將單一樣本資料前處理,寫成一個function(根據老師上課的模板與paper給予的參數調整) (1)讀檔 (matrix.mtx, barcode.tsv, and feature.tsv) (2)去除duplicated genes (3)資料載入seurat (4)計算mito gene的比例 (5)Cell QC: 複雜度500~6000,至少三顆細胞表達,粒線體基因不超過20% (6)Normalization: scaled to 10000 and log transformed (7)Identify highly variable genes

createSeuratObjEachSample <- function(project.name, 
                                      dir,
                                      min.cells = 3,  # paper
                                      min.features = 500,  # paper
                                      max.features = 6000,  # paper
                                      assay = "RNA",
                                      max.mt.percentage = 20,  # paper
                                      num.hvf = 2000,
                                      mt.pattern = "^mt-",
                                      normalization.method = "LogNormalize", # paper
                                      hvf.selection.method = "vst"){
  #### (1) Read files
  cat(c("Read files\n"))
  barcode.path <- file.path(dir, "barcodes.tsv")
  features.path <- file.path(dir, "genes.tsv")
  matrix.path <- file.path(dir, "matrix.mtx")
  mat <- Matrix::readMM(file = matrix.path)
  feature.names = read.delim(features.path,
                             header = FALSE,
                             stringsAsFactors = FALSE)
  barcode.names = read.delim(barcode.path,
                             header = FALSE,
                             stringsAsFactors = FALSE)
  colnames(mat) = paste(project.name, barcode.names$V1, sep = "_")
  rownames(mat) = feature.names$V2
  mat <- as.matrix(mat)
  cat(c("-> matrix:", dim(mat), "\n"))
  
  #### (2) 將基因名子一樣的資料加總起來
  cat(c("Remove duplicated genes\n"))
  IDfreqs <- table(rownames(mat))
  IDfreqs <- IDfreqs[IDfreqs > 1]
  cat(c("-> Num. dup. genes:", length(IDfreqs), "\n"))
  if (length(IDfreqs) > 0){
    for (i in names(IDfreqs)) {
      index <- which(rownames(mat) == i)
      duplicates <- as.matrix(mat[index, ])
      sums <- apply(duplicates, 2, sum, na.rm=TRUE)
      
      ### 置換成加總數值
      mat[index[1],] <- sums
      
      ### 將重複的資料移除
      for (j in length(index):2) {
        mat <- mat[-index[j], ]
      }
    }
  }
  cat(c("-> matrix:", dim(mat), "\n"))
  
  #### (3) 資料載入seurat
  cat(c("Create Seurat Object\n"))
  seurat.obj <- Seurat::CreateSeuratObject(counts = mat, 
                                           project = project.name,
                                           min.cells = min.cells, 
                                           min.features = min.features, 
                                           assay = assay, 
                                           meta.data = NULL)
  cat(c("-> dimension:", dim(seurat.obj), "\n"))
  
  #### (4) 計算mito gene的比例
  cat(c("Calculate MT percentage\n"))
  seurat.obj[["Mito_percent"]] <- Seurat::PercentageFeatureSet(seurat.obj, pattern = mt.pattern)
  
  #### (5) Cell QC
  cat(c("Cell QC\n"))
  min_nFeature <- min.features
  max_nFeature <- max.features
  max_Mito_percent <- max.mt.percentage
  seurat.obj <- seurat.obj %>%
    subset(., subset = nFeature_RNA > min_nFeature & nFeature_RNA < max_nFeature) %>%
    subset(., subset = Mito_percent < max_Mito_percent)  # paper
  cat(c("-> dimension:", dim(seurat.obj), "\n"))
  
  #### (6) Normalization
  cat(c("Normalization\n"))
  seurat.obj <- Seurat::NormalizeData(seurat.obj,
                                      normalization.method = normalization.method,
                                      scale.factor = 10000)  # paper
  
  #### (7) Identify highly variable genes
  cat(c("High variable genes\n"))
  seurat.obj <- Seurat::FindVariableFeatures(seurat.obj, 
                                             selection.method = hvf.selection.method, 
                                             nfeatures = num.hvf 
  )
  
  return(seurat.obj)
}

1.將每個樣本建構出Seurat object

k_seurat <- createSeuratObjEachSample (project.nam = "k", 
                                       dir = "K",
                                       min.cells = 3,
                                       min.features = 500,
                                       max.features = 6000,
                                       assay = "RNA",
                                       max.mt.percentage = 20,
                                       num.hvf = 2000,
                                       mt.pattern = "^mt-",
                                       normalization.method = "LogNormalize",
                                       hvf.selection.method = "vst")
## Read files
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 1.0 GiB
## -> matrix: 20304 6696 
## Remove duplicated genes
## -> Num. dup. genes: 0 
## -> matrix: 20304 6696 
## Create Seurat Object
## Warning: Data is of class matrix. Coercing to dgCMatrix.
## -> dimension: 18738 6696 
## Calculate MT percentage
## Cell QC
## -> dimension: 18738 6696 
## Normalization
## Normalizing layer: counts
## High variable genes
## Finding variable features for layer counts
kl_seurat <- createSeuratObjEachSample (project.nam = "kl", 
                                        dir = "KL",
                                        min.cells = 3,
                                        min.features = 500,
                                        max.features = 6000,
                                        assay = "RNA",
                                        max.mt.percentage = 20,
                                        num.hvf = 2000,
                                        mt.pattern = "^mt-",
                                        normalization.method = "LogNormalize",
                                        hvf.selection.method = "vst")
## Read files
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 1.1 GiB
## -> matrix: 20304 7564 
## Remove duplicated genes
## -> Num. dup. genes: 0 
## -> matrix: 20304 7564 
## Create Seurat Object
## Warning: Data is of class matrix. Coercing to dgCMatrix.
## -> dimension: 19458 7564 
## Calculate MT percentage
## Cell QC
## -> dimension: 19458 7564 
## Normalization
## Normalizing layer: counts
## High variable genes
## Finding variable features for layer counts

觀察兩個Seurat object

VlnPlot(k_seurat, features = c("nFeature_RNA", "nCount_RNA", "Mito_percent"), ncol = 3)

VlnPlot(kl_seurat, features = c("nFeature_RNA", "nCount_RNA", "Mito_percent"), ncol = 3)

  1. Data integration
data.list <- list("k" = k_seurat, "kl" = kl_seurat)
# select features that are repeatedly variable across datasets for integration
features <- Seurat::SelectIntegrationFeatures(object.list = data.list, 
                                              nfeatures = 2000,
                                              verbose = FALSE)
# identify anchors 
anchors <- Seurat::FindIntegrationAnchors(object.list = data.list, 
                                          anchor.features = features,
                                          verbose = F)
# creates 'integrated' data assay
combined.obj <- Seurat::IntegrateData(anchorset = anchors)
## Merging dataset 1 into 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
DefaultAssay(combined.obj) <- "integrated"
dim(combined.obj)
## [1]  2000 14260
combined.obj@meta.data$orig.ident %>% table()
## .
##    k   kl 
## 6696 7564
combined.obj@meta.data %>% head()
##                    orig.ident nCount_RNA nFeature_RNA Mito_percent
## k_AAACCCAAGACAAGCC          k       5388         1842    3.0252413
## k_AAACCCAAGATCGACG          k       8021         2329    2.9173420
## k_AAACCCAAGGCTGTAG          k       7292         2613    2.6330225
## k_AAACCCACAATAGAGT          k       2706          912    0.4065041
## k_AAACCCACAGATTAAG          k       4832         1579    3.7251656
## k_AAACCCAGTATGGTAA          k       8292         2217    3.3043898
  1. Run the standard workflow for visualization and clustering
combined.obj <- Seurat::ScaleData(combined.obj, verbose = FALSE)
combined.obj <- Seurat::RunPCA(combined.obj, npcs = 50, verbose = FALSE)
ElbowPlot(combined.obj, ndims = 50)

pcs <- 20   # 肉眼判斷為20(符合paper作法)

visualizing both cells and features that define the PCA

VizDimLoadings(combined.obj, dims = 1:2, reduction = "pca")

DimPlot(combined.obj, reduction = "pca") + NoLegend()

DimHeatmap(combined.obj, dims = 1, cells = 500, balanced = TRUE)

  1. Run non-linear dimensional reduction (UMAP/tSNE)
combined.obj <- combined.obj %>%
  Seurat::RunUMAP(reduction = "pca", umap.method = "uwot", dims = 1:pcs, verbose = FALSE) %>%
  Seurat::RunTSNE(reduction="pca", dims= 1:pcs, verbose = FALSE) %>%
  Seurat::FindNeighbors(reduction = "pca", dims = 1:pcs, verbose = FALSE) %>%
  Seurat::FindClusters(resolution = 0.06, verbose = FALSE) %>%  # paper提供resolution = 0.06
  Seurat::FindSubCluster("0",resolution = 0.06, graph.name = "integrated_snn", 
                         subcluster.name = "new_cluster")  # T cell進一步subcluster
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4739
## Number of edges: 192814
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9576
## Number of communities: 2
## Elapsed time: 0 seconds

觀察cluster狀況

Seurat::DimPlot(combined.obj, reduction="umap")  

Seurat::DimPlot(combined.obj, reduction="tsne")

Seurat::DimPlot(combined.obj, reduction="tsne", group.by = "orig.ident")

Seurat::DimPlot(combined.obj, reduction="umap", group.by = "new_cluster")

觀察Feature狀況

FeaturePlot(combined.obj, reduction="umap", features = "Mito_percent")

  1. Find markers for each cluster
markers <- FindAllMarkers(combined.obj, only.pos = TRUE) %>% 
  group_by(cluster) %>%
  dplyr::filter(avg_log2FC > 1) %>% 
  dplyr::arrange(desc(avg_log2FC)) %>% 
  slice_head(n = 10) %>%
  ungroup() -> top10
## Calculating cluster 0
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 1
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 2
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 3
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 4
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Warning in mean.fxn(object[features, cells.2, drop = FALSE]): NaNs produced
## Calculating cluster 5
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 6
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 7
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 8
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced

## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 9
# 將找到的marker進行視覺化,作heatmap
Seurat::DoHeatmap(combined.obj, group.by = "ident", features = top10$gene,draw.lines = TRUE) + NoLegend()

head(markers)
## # A tibble: 6 × 7
##       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene   
##       <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>  
## 1 8.33e- 63       5.01 0.155 0.061 1.67e- 59 0       Il17f  
## 2 4.75e-180       4.99 0.219 0.041 9.49e-177 0       Cd163l1
## 3 0               4.71 0.465 0.135 0         0       Dapl1  
## 4 1.75e-127       4.68 0.134 0.036 3.49e-124 0       Trgv2  
## 5 0               4.63 0.121 0.002 0         0       Trav4-2
## 6 3.39e- 62       4.46 0.186 0.091 6.78e- 59 0       Tcrg-C1

可以將特定gene作圖

FeaturePlot(combined.obj, reduction="umap", features = "Cd19", label = T)
## Warning: Could not find Cd19 in the default search locations, found in 'RNA'
## assay instead

VlnPlot(combined.obj, features = c("Cd19", "Cd3e"))
## Warning: Could not find Cd19 in the default search locations, found in 'RNA'
## assay instead
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead

6.annotation(from paper)

b_cell_mark <- c("Cd19","Cd22","Cd79a","Cd79b")
cancer_mark <- c("Epcam","Krt18","Krt8","Krt19")
dendritic_mark <-  c("Cd86","Cd83","Clec10a","Ccl22","Cxcl16","Xcr1","Rab7b","Naaa","Btla")
endo_mark <-  c("Pecam1","Tek","Plvap","Thbd","Tmem100","Adgrf5","Podxl","Nostrin","Acvrl1")
macrophage_mark <- c("Adgre1","Itgam","Itgal","Cd14","Ccl9","F13a1","Cx3cr1","Cd68")
neutrophil_mark <- c("Csf3r","Cxcr2S100a8")
nk_mark <- c("Klrd1","Nkg7","Prf1","Gzma","Gzmb","Ccl5","Cma1","Klre1","Klra4")
plasma_dend_mark <- c("Ccr9","Bst2")
t_cell_mark <- c("Cd3d","Cd5","Cd3e")

觀察cell cluster marker是否能夠代表該cluster

FeaturePlot(combined.obj, reduction="umap", features = b_cell_mark[1:4], label = T)
## Warning: Could not find Cd19 in the default search locations, found in 'RNA'
## assay instead
## Warning: Could not find Cd22 in the default search locations, found in 'RNA'
## assay instead

FeaturePlot(combined.obj, reduction="umap", features = cancer_mark[1:4], label = T)

FeaturePlot(combined.obj, reduction="umap", features = dendritic_mark[1:4], label = T)

FeaturePlot(combined.obj, reduction="umap", features = endo_mark[1:4], label = T)

FeaturePlot(combined.obj, reduction="umap", features = macrophage_mark[1:4], label = T)
## Warning: Could not find Itgal in the default search locations, found in 'RNA'
## assay instead

FeaturePlot(combined.obj, reduction="umap", features = neutrophil_mark, label = T)
## Warning: The following requested variables were not found: Cxcr2S100a8

FeaturePlot(combined.obj, reduction="umap", features = nk_mark[1:4], label = T)

FeaturePlot(combined.obj, reduction="umap", features = plasma_dend_mark, label = T)

# T cell
p1 <- FeaturePlot(combined.obj, reduction="umap", features = "Cd3e", label = T)  # T cell
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
p2 <- FeaturePlot(combined.obj, reduction="umap", features = "Tcf7", label = T)  # activated T
## Warning: Could not find Tcf7 in the default search locations, found in 'RNA'
## assay instead
p3 <- FeaturePlot(combined.obj, reduction="umap", features = "Ctla4", label = T) # exhausted T
cowplot::plot_grid(p1, p2, p3, ncol = 3)

VlnPlot(combined.obj, features = c("Cd3e","Tcf7","Ctla4"))
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
## Warning: Could not find Tcf7 in the default search locations, found in 'RNA'
## assay instead

cluster annotation(圖1d)

combined.obj$`new_cluster`[combined.obj$`new_cluster` == "0_0"] <- "T cell activated"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "0_1"] <- "T cell exhausted"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "1"] <- "Endothelial cell"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "2"] <- "Macrophage"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "3"] <- "B cell"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "4"] <- "Neutrophils"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "5"] <- "dendritic cell"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "6"] <- "NK cell"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "7"] <- "cancer cell1"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "8"] <- "cancer cell2"
combined.obj$`new_cluster`[combined.obj$`new_cluster` == "9"] <- "Plasmacytoid dendritic cell"

combined.obj <- SetIdent(combined.obj, value = combined.obj@meta.data$`new_cluster`)
# 查看annotation後的umap (圖1d)
Seurat::DimPlot(combined.obj, reduction="umap", label = T)  # 圖1d

7.visualization Ridge plots - from ggridges. Visualize single cell expression distributions in each cluster

features <- c("Cd3e","Ctla4")
RidgePlot(combined.obj, features = features, ncol = 1)
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead
## Picking joint bandwidth of 0.0237
## Picking joint bandwidth of 0.022

Violin plot - Visualize single cell expression distributions in each cluster

VlnPlot(combined.obj, features = features, ncol = 2)
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead

VlnPlot(combined.obj, features = features, split.by = "orig.ident")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead

FeaturePlot

FeaturePlot(combined.obj, features = features)
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead

FeaturePlot(combined.obj, features = features, reduction = "umap",blend = TRUE)
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead

FeaturePlot(combined.obj, features = features, split.by = "orig.ident")
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead

DotPlot -dot size corresponds to the percentage of cells expressing the feature

DotPlot(combined.obj, features = features) + RotatedAxis()
## Warning: Could not find Cd3e in the default search locations, found in 'RNA'
## assay instead

Heatmap

# Single cell heatmap of feature expression
DoHeatmap(subset(combined.obj, downsample = 100), features = top10$gene, size = 3) + NoLegend()

每個細胞的marker用dotplot做圖

# Identify conserved cell type markers
sg <- list(bcell = c("Cd19","Cd22","Cd79a","Cd79b"),
           cancer = c("Epcam","Krt18","Krt8","Krt19"),
           dendritic = c("Cd86","Cd83","Clec10a","Ccl22","Cxcl16","Xcr1","Rab7b","Naaa","Btla"),
           endo = c("Pecam1","Tek","Plvap","Thbd","Tmem100","Adgrf5","Podxl","Nostrin","Acvrl1"),
           macrophage = c("Adgre1","Itgam","Itgal","Cd14","Ccl9","F13a1","Cx3cr1","Cd68"),
           neutro= c("Csf3r","Cxcr2S100a8"),
           nkcell = c("Klrd1","Nkg7","Prf1","Gzma","Gzmb","Ccl5","Cma1","Klre1","Klra4"),
           plasma_dend = c("Ccr9","Bst2"),
           tcell = c("Cd3d","Cd5","Cd3e"))
DotPlot(combined.obj, features = sg, cols = c("blue", "red"), dot.scale = 4, assay = "RNA") +
  RotatedAxis()
## Warning: The following requested variables were not found: Cxcr2S100a8
## Warning: The `facets` argument of `facet_grid()` is deprecated as of ggplot2 2.2.0.
## ℹ Please use the `rows` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

8.計算細胞種類數量,並創建 bar plot(圖1f)

cell_proportions <- combined.obj@meta.data[,c(1,7)]
cell_ratio <- table(cell_proportions) %>% t() %>% as.data.frame()
k_total_cell <- cell_ratio$Freq[1:11] %>% sum()
kl_total_cell <- cell_ratio$Freq[12:22] %>% sum()
k_table <- cell_ratio[1:10,]
kl_table <- cell_ratio[11:22,]
k_table$ratio <- k_table$Freq/k_total_cell
kl_table$ratio <- kl_table$Freq/kl_total_cell
final <- rbind(k_table,kl_table) %>% set_names(c("cluster","ident","freq","ratio"))
head(final)
##            cluster ident freq       ratio
## 1           B cell     k  442 0.066009558
## 2     cancer cell1     k  313 0.046744325
## 3     cancer cell2     k   25 0.003733572
## 4   dendritic cell     k  230 0.034348865
## 5 Endothelial cell     k 1194 0.178315412
## 6       Macrophage     k 1236 0.184587814
library(ggplot2)
p <- ggplot(data = final, aes(x = ratio, y = ident, fill = cluster)) +
  geom_bar(stat = "identity") + theme_void()
p  # 圖1f

創建T cell exhausted violin plot(圖1g)

exh_T <- combined.obj %>%
  subset(., subset = new_cluster == "T cell exhausted")
exh_T <- SetIdent(exh_T, value = exh_T@meta.data$orig.ident) 
VlnPlot(exh_T, features = "Ctla4")  # 圖1g

9.抓出T cell進行分析

act_T <- combined.obj %>%
  subset(., subset = new_cluster == "T cell activated")
act_T <- SetIdent(act_T, value = act_T@meta.data$orig.ident) 
t_de_gene <- FindAllMarkers(act_T) %>% filter(cluster == "kl")
## Calculating cluster k
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster kl
## Warning in mean.fxn(object[features, cells.2, drop = FALSE]): NaNs produced
head(t_de_gene)
##           p_val avg_log2FC pct.1 pct.2 p_val_adj cluster    gene
## Egr3.1        0 -0.9327606 0.032 0.940         0      kl    Egr3
## Thbs1.1       0 -0.7934839 0.039 0.914         0      kl   Thbs1
## Mef2c.1       0 -0.3741167 0.022 0.897         0      kl   Mef2c
## Ager.1        0 -0.4077776 0.023 0.889         0      kl    Ager
## Afap1l1.1     0 -1.4697333 0.011 0.876         0      kl Afap1l1
## Alcam.1       0 -0.4896868 0.030 0.887         0      kl   Alcam

創建T cell activated violin plot(圖1g)

VlnPlot(act_T, features = "Tcf7")  # 圖1g
## Warning: Could not find Tcf7 in the default search locations, found in 'RNA'
## assay instead

創建基因list,並按照avg_log2FC由大到小排序

library(clusterProfiler)
## 
## clusterProfiler v4.10.0  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:stats':
## 
##     filter
library(enrichplot)

organism <- "org.Mm.eg.db"
list1 <- t_de_gene$avg_log2FC
names(list1) <- t_de_gene$gene
gsea_list_t <- na.omit(list1) %>% 
  sort(., decreasing = TRUE)
head(gsea_list_t)
##          Pkib F630028O10Rik         Fscn1      Cdc42bpa         Prdm1 
##      8.402034      5.632552      5.529738      5.287390      5.254809 
##        Hbb-bt 
##      4.549129

將activated T cell基因表現量進行GSEA分析

t_gse <- clusterProfiler::gseGO(geneList = gsea_list_t, 
                              ont = "ALL",      # "BP", "MF", "CC"
                              keyType = "SYMBOL", 
                              nPerm = 10000, 
                              minGSSize = 3, 
                              maxGSSize = 800, 
                              pvalueCutoff = 0.05, 
                              verbose = TRUE, 
                              OrgDb = organism, 
                              pAdjustMethod = "none")
## 
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...

創建activated T cell的GSEA dotplot(圖1h)

dotplot(t_gse, split=".sign") + facet_grid(~.sign)  # 圖1h

10.用subset將cancer cell population從combined.obj抓出來,並將k與kl基因表現量進行分析

cancer <- combined.obj %>%
  subset(., subset = new_cluster == c("cancer cell1","cancer cell2"))
cancer <- SetIdent(cancer, value = cancer@meta.data$orig.ident) 
c_de_gene <- FindAllMarkers(cancer) %>% filter(cluster == "kl")
## Calculating cluster k
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster kl
## Warning in mean.fxn(object[features, cells.2, drop = FALSE]): NaNs produced
head(c_de_gene)
##                 p_val avg_log2FC pct.1 pct.2    p_val_adj cluster   gene
## Btla.1   1.015995e-87 -2.2690035 0.007 0.472 2.031990e-84      kl   Btla
## Bank1.1  1.020991e-87 -1.5972041 0.011 0.491 2.041982e-84      kl  Bank1
## Irf4.1   3.398922e-83 -0.8570328 0.021 0.857 6.797844e-80      kl   Irf4
## Ccl8.1   2.221741e-82  0.1315052 0.014 0.534 4.443481e-79      kl   Ccl8
## Incenp.1 6.105497e-82  0.1424943 0.025 0.199 1.221099e-78      kl Incenp
## Knstrn.1 1.067041e-81 -0.2108657 0.025 0.832 2.134083e-78      kl Knstrn

將cancer cell基因表現量進行GSEA分析

organism <- "org.Mm.eg.db"
list2 <- c_de_gene$avg_log2FC
names(list2) <- c_de_gene$gene
gsea_list_c <- na.omit(list2) %>% 
  sort(., decreasing = TRUE)
c_gse <- clusterProfiler::gseGO(geneList = gsea_list_c, 
                              ont = "ALL",      # "BP", "MF", "CC"
                              keyType = "SYMBOL", 
                              nPerm = 10000, 
                              minGSSize = 3, 
                              maxGSSize = 800, 
                              pvalueCutoff = 0.05, 
                              verbose = TRUE, 
                              OrgDb = organism, 
                              pAdjustMethod = "none")
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in fgseaSimple(pathways = pathways, stats = stats, minSize = minSize, :
## There were 12 pathways for which P-values were not calculated properly due to
## unbalanced gene-level statistic values
## leading edge analysis...
## done...

創建cancer cell的GSEA dotplot(圖2a)

dotplot(c_gse, split=".sign") + facet_grid(~.sign)  # 圖2a

11.use down regulated gene list(act_T cell)

t_down_gene <- FindAllMarkers(act_T) %>% filter(avg_log2FC < 0 & cluster == "kl")
## Calculating cluster k
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster kl
## Warning in mean.fxn(object[features, cells.2, drop = FALSE]): NaNs produced
head(t_down_gene)
##           p_val avg_log2FC pct.1 pct.2 p_val_adj cluster    gene
## Egr3.1        0 -0.9327606 0.032 0.940         0      kl    Egr3
## Thbs1.1       0 -0.7934839 0.039 0.914         0      kl   Thbs1
## Mef2c.1       0 -0.3741167 0.022 0.897         0      kl   Mef2c
## Ager.1        0 -0.4077776 0.023 0.889         0      kl    Ager
## Afap1l1.1     0 -1.4697333 0.011 0.876         0      kl Afap1l1
## Alcam.1       0 -0.4896868 0.030 0.887         0      kl   Alcam
down_t <- t_down_gene$avg_log2FC
names(down_t) <- t_down_gene$gene
down_list_t <- na.omit(down_t) %>% 
  sort(., decreasing = FALSE)
go_t <- clusterProfiler::enrichGO(gene = names(down_list_t),
                                  OrgDb = "org.Mm.eg.db",
                                  keyType = "SYMBOL",
                                  ont = "ALL")
head(as.data.frame(go_t))
##            ONTOLOGY         ID                         Description GeneRatio
## GO:0050900       BP GO:0050900                 leukocyte migration    62/649
## GO:0006935       BP GO:0006935                          chemotaxis    62/649
## GO:0042330       BP GO:0042330                               taxis    62/649
## GO:0050727       BP GO:0050727 regulation of inflammatory response    56/649
## GO:0097529       BP GO:0097529         myeloid leukocyte migration    44/649
## GO:0030198       BP GO:0030198   extracellular matrix organization    48/649
##              BgRatio       pvalue     p.adjust       qvalue
## GO:0050900 398/28891 1.150597e-33 5.746083e-30 2.937051e-30
## GO:0006935 462/28891 6.668120e-30 1.419956e-26 7.257959e-27
## GO:0042330 464/28891 8.529972e-30 1.419956e-26 7.257959e-27
## GO:0050727 411/28891 1.995412e-27 2.491272e-24 1.273388e-24
## GO:0097529 253/28891 3.414838e-26 3.410740e-23 1.743364e-23
## GO:0030198 322/28891 2.151251e-25 1.765610e-22 9.024733e-23
##                                                                                                                                                                                                                                                                                                                                                                                    geneID
## GO:0050900 Pf4/Ppbp/Ednra/Il17a/Nkx2-3/Itga2b/Cadm1/Tnfsf11/Mmp2/Ccr1/Ccr2/Ccl17/Apod/Cd177/Ccr6/Mmp14/Thbs1/Itga9/Tnfaip6/C3ar1/Cxcr2/Cxcl3/Ccl21a/Cd200/Kit/Sele/Lbp/Ch25h/Serpine1/Aif1/Cxcl1/S100a14/Ccl20/Cd9/Sirpa/Pla2g7/Ccl7/Cd34/Ager/Il33/Gata3/Edn1/Fcer1g/Tnf/Mmp9/Tlr2/Cd24a/Fpr2/Cxcl14/Vegfa/Podxl/Il23a/S100a8/Flt1/Trem2/Gpr183/Lgals3/Slc12a2/Tnfrsf18/Ccr7/Plvap/Fcgr3
## GO:0006935       Pf4/Ppbp/Ednra/Ccr10/Tnfsf11/Mmp2/Ccr1/Ccr2/Ccl17/Egr3/Ccr6/Nrg1/Sema3e/Thbs1/Itga9/Tnfaip6/C3ar1/Cxcr2/Cxcl3/Sema3d/Ccl21a/Saa3/Ccr4/Kit/Lbp/Ch25h/Serpine1/Aif1/Eng/Slit3/Cxcl1/Ear10/S100a14/Ccl20/Pla2g7/Ccl7/Ager/Sema6a/Edn1/Fcer1g/Mmp9/Nrp2/Lox/Sema6d/Fpr2/Cxcl14/Ryk/Vegfa/Bmpr2/Il23a/Cdh13/S100a8/Flt1/Trem2/Ear1/Gpr183/Lrp1/Lgals3/Slc12a2/Ccr7/Nrp1/Fcgr3
## GO:0042330       Pf4/Ppbp/Ednra/Ccr10/Tnfsf11/Mmp2/Ccr1/Ccr2/Ccl17/Egr3/Ccr6/Nrg1/Sema3e/Thbs1/Itga9/Tnfaip6/C3ar1/Cxcr2/Cxcl3/Sema3d/Ccl21a/Saa3/Ccr4/Kit/Lbp/Ch25h/Serpine1/Aif1/Eng/Slit3/Cxcl1/Ear10/S100a14/Ccl20/Pla2g7/Ccl7/Ager/Sema6a/Edn1/Fcer1g/Mmp9/Nrp2/Lox/Sema6d/Fpr2/Cxcl14/Ryk/Vegfa/Bmpr2/Il23a/Cdh13/S100a8/Flt1/Trem2/Ear1/Gpr183/Lrp1/Lgals3/Slc12a2/Ccr7/Nrp1/Fcgr3
## GO:0050727                                   Il17a/Pde5a/Enpp3/Dnase1l3/Ighg1/Tnfsf11/C2cd4b/Ccr2/Il6/Fcer1a/Cma1/Ctla2a/Fabp4/Lilra5/Fcgr2b/H2-M2/Wfdc1/Tnfaip6/Cd200r3/Fcgr1/Cd200/Ptgs2/Lbp/Ptges/Il10/Gpx2/Serpine1/Mmp8/Pla2g2d/Hyal2/Sirpa/Slc7a2/Ighg2b/Igf1/Slc39a8/Ager/C3/Il33/Gata3/Fcer1g/Tnf/Tlr2/Cd24a/Fpr2/Mefv/Lta/Ier3/S100a8/Trem2/Pbk/Tnc/Ccr7/Cebpa/Lgals1/Mgll/Fcgr3
## GO:0097529                                                                                                               Pf4/Ppbp/Ednra/Il17a/Tnfsf11/Mmp2/Ccr1/Ccr2/Ccl17/Cd177/Mmp14/Thbs1/Itga9/Tnfaip6/C3ar1/Cxcr2/Cxcl3/Ccl21a/Cd200/Kit/Lbp/Serpine1/Aif1/Cxcl1/S100a14/Ccl20/Cd9/Sirpa/Pla2g7/Ccl7/Ager/Edn1/Fcer1g/Mmp9/Tlr2/Fpr2/Vegfa/Il23a/S100a8/Flt1/Trem2/Lgals3/Ccr7/Fcgr3
## GO:0030198                                                                            Aebp1/Colq/Loxl1/Sfrp2/Mmp2/Col4a4/Tnfrsf11b/Il6/Ecm2/Spock2/Cav2/Cma1/Col4a2/Col4a1/Col5a2/Mmp14/Ddr2/Col14a1/Rgcc/Adamts9/Eng/Adamtsl2/Ccdc80/Mmp8/Fbln5/Ramp2/Slc39a8/Col5a1/Col1a1/Loxl2/Tnf/Egfl6/Mmp9/Serpinh1/Lox/Has1/Tgfbi/Col4a5/Itga8/Cst3/Pxdn/Eln/Adamts1/Npnt/Lgals3/Mmp19/Ndnf/Gfod2
##            Count
## GO:0050900    62
## GO:0006935    62
## GO:0042330    62
## GO:0050727    56
## GO:0097529    44
## GO:0030198    48
barplot(go_t, showCategory = 20,   # 圖1h
        title = "Top20 GO pathways downregulated in activated T cells of KL vs. K", 
        label_format = 60)

12.use down regulated gene list(cancer cell)

c_down_gene <- FindAllMarkers(cancer) %>% filter(avg_log2FC < 0 & cluster == "kl")
## Calculating cluster k
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster kl
## Warning in mean.fxn(object[features, cells.2, drop = FALSE]): NaNs produced
head(c_down_gene)
##                  p_val avg_log2FC pct.1 pct.2    p_val_adj cluster    gene
## Btla.1    1.015995e-87 -2.2690035 0.007 0.472 2.031990e-84      kl    Btla
## Bank1.1   1.020991e-87 -1.5972041 0.011 0.491 2.041982e-84      kl   Bank1
## Irf4.1    3.398922e-83 -0.8570328 0.021 0.857 6.797844e-80      kl    Irf4
## Knstrn.1  1.067041e-81 -0.2108657 0.025 0.832 2.134083e-78      kl  Knstrn
## Lmo1.1    4.653159e-81 -2.1982336 0.018 0.814 9.306317e-78      kl    Lmo1
## Dcstamp.1 7.250898e-81 -0.8819352 0.014 0.957 1.450180e-77      kl Dcstamp
down_c <- c_down_gene$avg_log2FC
names(down_c) <- c_down_gene$gene
down_list_c <- na.omit(down_c) %>% 
  sort(., decreasing = FALSE)
go_c <- clusterProfiler::enrichGO(gene = names(down_list_c),
                                  OrgDb = "org.Mm.eg.db",
                                  keyType = "SYMBOL",
                                  ont = "BP")
head(as.data.frame(go_c))
##                    ID                            Description GeneRatio
## GO:0050900 GO:0050900                    leukocyte migration    58/506
## GO:0006935 GO:0006935                             chemotaxis    58/506
## GO:0042330 GO:0042330                                  taxis    58/506
## GO:0060326 GO:0060326                        cell chemotaxis    50/506
## GO:0050867 GO:0050867 positive regulation of cell activation    56/506
## GO:0050727 GO:0050727    regulation of inflammatory response    53/506
##              BgRatio       pvalue     p.adjust       qvalue
## GO:0050900 398/28891 8.051469e-36 3.637654e-32 2.117960e-32
## GO:0006935 462/28891 3.192707e-32 6.089368e-29 3.545428e-29
## GO:0042330 464/28891 4.043405e-32 6.089368e-29 3.545428e-29
## GO:0060326 331/28891 1.001735e-31 1.131459e-28 6.587724e-29
## GO:0050867 454/28891 9.996200e-31 9.032566e-28 5.259053e-28
## GO:0050727 411/28891 4.431942e-30 3.337252e-27 1.943057e-27
##                                                                                                                                                                                                                                                                                                                                                        geneID
## GO:0050900 Ctsg/Tnfsf11/Ccl12/Crtam/C3ar1/Cxcl13/F7/Ccl21a/Ccl4/Ednrb/Trem2/Ccl24/Vegfc/Ccr7/Cxcr1/Mmp2/Ccr2/Ccl7/Il1b/Aif1/Itgam/Tlr2/Csf1r/Ccl9/Ccl2/Sirpa/Tnfsf4/Ch25h/Slamf9/Ccl6/Fcgr3/Pycard/Ccr1/Cx3cr1/Adam8/Ccl3/Cd300a/Fcer1g/Tnfrsf18/Trem1/Jaml/Pgf/Lgals3/Gpr183/Ptafr/Pla2g7/Itga2b/Flt1/Selp/Eps8/Cyp7b1/Bst1/Thbs1/Pecam1/Ppbp/Lyve1/Ecm1/Tnf
## GO:0006935             Ctsg/Tnfsf11/Ccl12/C3ar1/Cxcl13/F7/Ccl21a/Ccl4/Ednrb/Saa3/Ccr4/Trem2/Ccl24/Vegfc/Ccr7/Cxcr1/Mmp2/Ccr2/Ccl7/Il1b/Aif1/Itgam/Csf1r/Ccl9/Ccl2/Ch25h/Nrg1/Ccr10/Slamf9/Ccl6/Lrp1/Fcgr3/Ccr1/Cx3cr1/Adam8/Ccl3/Ear2/Fcer1g/Trem1/Hmgb2/Jaml/Fn1/Pgf/Lgals3/Ear1/Gpr183/Ccrl2/Ccr5/Ptafr/Nrp2/Pla2g7/Flt1/Dock4/Cyp7b1/Bst1/Thbs1/Cxcr6/Ppbp
## GO:0042330             Ctsg/Tnfsf11/Ccl12/C3ar1/Cxcl13/F7/Ccl21a/Ccl4/Ednrb/Saa3/Ccr4/Trem2/Ccl24/Vegfc/Ccr7/Cxcr1/Mmp2/Ccr2/Ccl7/Il1b/Aif1/Itgam/Csf1r/Ccl9/Ccl2/Ch25h/Nrg1/Ccr10/Slamf9/Ccl6/Lrp1/Fcgr3/Ccr1/Cx3cr1/Adam8/Ccl3/Ear2/Fcer1g/Trem1/Hmgb2/Jaml/Fn1/Pgf/Lgals3/Ear1/Gpr183/Ccrl2/Ccr5/Ptafr/Nrp2/Pla2g7/Flt1/Dock4/Cyp7b1/Bst1/Thbs1/Cxcr6/Ppbp
## GO:0060326                                                      Ctsg/Tnfsf11/Ccl12/C3ar1/Cxcl13/F7/Ccl21a/Ccl4/Ednrb/Saa3/Ccr4/Ccl24/Vegfc/Ccr7/Cxcr1/Mmp2/Ccr2/Ccl7/Il1b/Aif1/Itgam/Csf1r/Ccl9/Ccl2/Ch25h/Ccr10/Slamf9/Ccl6/Fcgr3/Ccr1/Cx3cr1/Adam8/Ccl3/Fcer1g/Trem1/Hmgb2/Jaml/Pgf/Lgals3/Gpr183/Ccrl2/Ccr5/Pla2g7/Flt1/Dock4/Cyp7b1/Bst1/Thbs1/Cxcr6/Ppbp
## GO:0050867     Ms4a2/Tnfsf11/Foxp3/Ccl21a/Cd160/Il10/Cd209a/Il18/Trem2/Tnfrsf4/Il1rl1/Havcr2/Ccr7/Ccr2/Cd86/Il1b/Aif1/Itgam/Csf1r/Ccl2/Il5/Sirpa/Tnfsf4/Pycard/Lilra5/Adam8/Tyrobp/Lgals1/Fcer1g/Tnfrsf13c/Cd274/Cd209d/Mef2c/Gpr183/Cdkn1a/Igf1/Actb/Clec4d/Ptafr/Zbtb16/Acta2/Nlrp3/Ighm/Il6/Axl/Cd83/Selp/Icos/Nfkbiz/Bst1/Thbs1/Cd40/Gata2/Tnf/Malt1/Ighd
## GO:0050727                              Dnase1l3/Tnfsf11/Tnc/Snca/Cd200r3/Foxp3/Il10/Ednrb/Ptgis/Il18/Trem2/Il1rl1/Il22ra2/Ccl24/Ccr7/Ccr2/Il1b/Tlr2/Csf1r/Osm/Enpp3/Mefv/Sirpa/Tnfsf4/Ctss/Acp5/Fcgr3/Pycard/Ahr/Cx3cr1/Lilra5/Adam8/Ccl3/Alox5ap/Lgals1/Fcer1g/Apoe/Slc7a2/Lpl/Igf1/Ccr5/Nkg7/Nlrp3/Cd44/Il6/Fcgr1/Ier3/Nfkbiz/Fcgr2b/Bst1/Tnf/Pglyrp1/Cma1
##            Count
## GO:0050900    58
## GO:0006935    58
## GO:0042330    58
## GO:0060326    50
## GO:0050867    56
## GO:0050727    53
dotplot(go_c, showCategory = 20, 
        title = "EnrichmentGO_BP Top20 GO pathways downregulated in KL vs. K",
        label_format = 60)  # 圖2a